## --------------------------------------- ##
## cluster setup 
## --------------------------------------- ##
## i <- as.numeric(Sys.getenv("SLURM_ARRAY_TASK_ID")) # this should be 1--50
for(i in 1:length.out){

  cl <- makeCluster(detectCores()/2)
  registerDoParallel(cl)
  

## draw error term and n 
set.seed(3893)
n    <- floor(p * delta[i])
eta  <- rnorm(n)
etaz <- sigma * eta 
n_break <- floor(n / 2)

# doRNGseed(1234)
sim_out <- foreach(j = 1:length(rho), .combine = "rbind", .packages= c("BridgeChange", "glmnet", "MCMCpack", "mvnfast")) %dorng% ({
    
    # sim_out <- matrix(NA, nrow = length(rho), ncol = 3)
    # for (j in 1:length(rho)) {
        cat("Now at i = ", i, " j = ", j ,"\n")
        k <- ceiling(n * rho[j])

        l2_error <- save_lasso_ora <- save_bridge <- save_blasso <- save_ridge <- save_en  <- rep(NA, n_iter)
        for (iter in 1:n_iter) {
          rm(y); rm(X);
            ## gen data ***************************************************************
            Xfull <- as.matrix(faux::rnorm_multi(n = n,  mu = rep(0, p), r = 0.7))
            ## Xfull <- rmvn(n, rep(0, p), Sigma)
            # Xfull <- matrix(rnorm(n * p), nrow = n, ncol = p)

            # regime specific coef
            beta1 <- sample(c(rnorm(k, 0, 3), rep(0, p - k)))
            beta2 <- sample(c(rnorm(k, 0, 3), rep(0, p - k)))
            beta  <- c(beta1, beta2)
           
            # draw data
            yfull <- rep(NA, n)
            yfull[1:n_break] <- Xfull[1:n_break,] %*% beta1 + etaz[1:n_break]
            yfull[(n_break+1):n] <- Xfull[(n_break+1):n, ] %*% beta2 + etaz[(n_break+1):n]

            ## cv setup
            n_out  <- floor(n * n_miss)
            id_use <- sample(c(rep(0, n_out), rep(1, n - n_out)))
            y <<- yfull[id_use == 1]; X <<- Xfull[id_use == 1, ]
            n0 <- sum(id_use == 0)
            n1 <- sum(id_use == 1)
            state_true <- c(rep(1, n_break), rep(2, n-n_break))

            ## samples used to train the model
            oracle_id <- list(state_true == 1 & id_use == 1, state_true == 2 & id_use == 1)

            ## fit lasso ***************************************************************

            ## 1) residual break test and recover break point ************************
            out         <- glmnet(y = y, x= X, alpha = 1)
            BIC         <- deviance(out) / var(y) + log(length(y)) * out$df / length(y)
            la_coef     <- coef(out)[,which.min(BIC)]
            resid       <- y - (X %*%  la_coef[-1] + la_coef[1])
            resid_break <- MCMCpack::MCMCresidualBreakAnalysis(resid)
            state_id    <- round(apply(attr(resid_break, "s.store"), 2, median))

            if (sum(table(state_id) < 2) >= 1) {
                state_id[2] <- 1; state_id[n1-1] <- 2
            }

            ## fit state by state
            out_la1     <- glmnet(y = y[state_id == 1], x = X[state_id == 1,])
            out_la2     <- glmnet(y = y[state_id == 2], x = X[state_id == 2,])
            BIC1 <- deviance(out_la1) / var(y[state_id==1]) + log(sum(state_id==1)) * out_la1$df / sum(state_id==1)
            BIC2 <- deviance(out_la2) / var(y[state_id==2]) + log(sum(state_id==2)) * out_la2$df / sum(state_id==2)

            ## make out of sample prediction
            ## predict state
            my.prob <- table(state_id)/length(state_id)
            state.mixed = sample(c(1:2), n0, replace = TRUE, prob = my.prob)
            state.hat <- sort(state.mixed)

            la_coef_full <- rbind(coef(out_la1)[,which.min(BIC1)], coef(out_la2)[,which.min(BIC2)]) ## this include intercepts
            la_predict <- cbind(rep(1, n0), Xfull[id_use == 0,]) %*% t(la_coef_full)

            la_y_pred  <- sapply(1:n0, function(t) la_predict[t, state.hat[t]])
            l2_error[iter] <- sqrt(sum((la_y_pred - yfull[id_use == 0])^2)) / n0 #/ sqrt(sum(beta^2))

            ## 2) oracle break point (fit two different lasso) *************************
            oracle_id <- list(state_true == 1 & id_use == 1, state_true == 2 & id_use == 1)
            out_la1_ora  <- glmnet(y = yfull[oracle_id[[1]]], x = Xfull[oracle_id[[1]],], alpha = 1)
            out_la2_ora  <- glmnet(y = yfull[oracle_id[[2]]], x = Xfull[oracle_id[[2]],], alpha = 1)

            BIC1_ora     <- deviance(out_la1_ora) / var(yfull[oracle_id[[1]]]) + log(length(yfull[oracle_id[[1]]])) * out_la1_ora$df / length(yfull[oracle_id[[1]]])
            BIC2_ora     <- deviance(out_la2_ora) / var(yfull[oracle_id[[2]]]) + log(length(yfull[oracle_id[[2]]])) * out_la2_ora$df / length(yfull[oracle_id[[2]]])

            # compute L2 error
            oracle_test_id <- list(state_true == 1 & id_use == 0, state_true == 2 & id_use == 0)
            la_pred1_ora <- Xfull[oracle_test_id[[1]],] %*% coef(out_la1_ora)[-1, which.min(BIC1_ora)] + coef(out_la1_ora)[1,which.min(BIC1_ora)]
            la_pred2_ora <- Xfull[oracle_test_id[[2]],] %*% coef(out_la2_ora)[-1, which.min(BIC2_ora)] + coef(out_la2_ora)[1, which.min(BIC2_ora)]
            la_pred_ora_full <- c(la_pred1_ora, la_pred2_ora)
            save_lasso_ora[iter] <- sqrt(sum((la_pred_ora_full - yfull[id_use == 0])^2)) / n0


            ## fit SparseChange ********************************************************
            out1 <- BridgeChangeReg(y~X, mcmc= 100, burn = 100, thin=1, verbose=0, n.break = 1)
            beta.bridge <- coef_bridge(out1)
            beta0.bridge <- apply(attr(out1, 'intercept'), 2, mean)
            state_id    <- round(apply(attr(out1, "s.store"), 2, median))
            
            my.prob <- table(state_id)/length(state_id)
            state.mixed = sample(c(1:2), n0, replace = TRUE, prob = my.prob)
            state.hat <- sort(state.mixed)

            beta.bridge.full <- rbind(c(beta0.bridge[1], beta.bridge[1:200]),
                                      c(beta0.bridge[2], beta.bridge[201:400]))
            la_predict <- cbind(rep(1, n0), Xfull[id_use == 0,]) %*% t(beta.bridge.full)

            bridge_pred <- sapply(1:n0, function(t) la_predict[t, state.hat[t]])
            save_bridge[iter]  <- sqrt(sum((bridge_pred - yfull[id_use==0])^2)) / n0

            # ## fit bayesian lasso *************************************************
            # out_blasso <- blasso(X = X, y = y, T = 200)
            # beta.blasso <- apply(out_blasso$beta[101:100, ], 2, mean)
            # save_blasso[iter]  <- sqrt(sum((beta - beta.blasso)^2)) / sqrt(sum(beta^2))

            # ## fit ridge ********************************************************
            # rg_out <- cv.glmnet(y = y, x= X, alpha = 0, nfolds = 3)
            # save_ridge[iter] <- sqrt(sum((coef(rg_out, s = "lambda.min")[-1,] - beta)^2)) / sqrt(sum(beta^2))
            #
            # ## fit elastic net **************************************************
            # en_out <- cv.glmnet(y = y, x= X, alpha = 0.5, nfolds = 3)
            # save_en[iter] <- sqrt(sum((coef(en_out, s = "lambda.min")[-1,] - beta)^2)) / sqrt(sum(beta^2))

        }

        mse.lasso     <- median(l2_error, na.rm = TRUE)
        mse.bridge    <- median(save_bridge, na.rm = TRUE)
        mse.lasso_ora <- median(save_lasso_ora, na.rm = TRUE)
        # mse.blasso    <- median(save_blasso, na.rm = TRUE)
        # mse.ridge  <- median(save_ridge, na.rm = TRUE)
        # mse.elastic <- median(save_en, na.rm = TRUE)


        out <- c(mse.lasso, mse.lasso_ora, mse.bridge) #, mse.ridge, mse.elastic)
        out


})
    cat("\nCurrent simulation is ", i, " iteration.\n")
    saveRDS(sim_out, file = paste("./change/corr07/cv/res/sim_cv_change_mse_corr07_", i, ".rds", sep = ''))
    
    
    stopCluster(cl)
    stopImplicitCluster()
}
